Load libraries
source("../General/rmb_functions.R")
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
✓ ggplot2 3.3.2.9000 ✓ purrr 0.3.4
✓ tibble 3.0.1 ✓ dplyr 1.0.0
✓ tidyr 1.1.0 ✓ stringr 1.4.0
✓ readr 1.3.1 ✓ forcats 0.5.0
── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
source("../General/parameters.R")
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(ggtree)
Registered S3 method overwritten by 'treeio':
method from
root.phylo ape
ggtree v1.16.6 For help: https://yulab-smu.github.io/treedata-book/
If you use ggtree in published research, please cite the most appropriate paper(s):
- Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(12):3041-3043. doi: 10.1093/molbev/msy194
- Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36, doi:10.1111/2041-210X.12628
Attaching package: ‘ggtree’
The following object is masked from ‘package:tidyr’:
expand
library(cowplot)
********************************************************
Note: As of version 1.0.0, cowplot does not change the
default ggplot2 theme anymore. To recover the previous
behavior, execute:
theme_set(theme_cowplot())
********************************************************
library(tidyverse)
Load data
otu <- readRDS("../Data/otu_pers.RDS")
map <- readRDS("../Data/drought_map.RDS")
rs.fc <- readRDS("../Data/rs_shlfc.RDS")
es.fc <- readRDS("../Data/es_shlfc.RDS")
tax <- readRDS("../Data/tax.RDS") %>%
mutate(PhyClass2 = fct_recode(PhyClass2, "Low Abundance" = "other"))
Remove RF training set from mapping file. Subset mapping files for each compartment
map <- filter(map, Treatment2 != "WC_TRN")
rs.map <- filter(map, Compartment == "RS")
es.map <- filter(map, Compartment == "ES")
Get the relative abundances, reformat, and subset
otu <- rel_ab(otu)
rs.otu.tidy <- tidy_otu(otu) %>%
mutate(Count = Count/100) %>%
filter(SampleID %in% rs.map$SampleID) %>%
filter(!is.na(Count))
es.otu.tidy <- tidy_otu(otu) %>%
mutate(Count = Count/100) %>%
filter(SampleID %in% es.map$SampleID) %>%
filter(!is.na(Count))
Get significant OTUs for each set
rs.sig <- rs.fc %>%
select(-Day) %>%
separate(Contrast, c("Treatment", "Time"), sep = "\\.") %>%
filter(p.adjusted < 0.05) %>%
filter(Treatment != "WC_TRN")
es.sig <- es.fc %>%
select(-Day) %>%
separate(Contrast, c("Treatment", "Time"), sep = "\\.") %>%
filter(p.adjusted < 0.05) %>%
filter(Treatment != "WC_TRN")
For visualization purposes, generate a data frame with the range of drought stress time points for each treatment
trt.lines <- data.frame(Treatment = c("DS1", "DS2", "DS3"),
Treatment2 = c("DS1", "DS2", "DS3"),
Contrast = c("WC vs DS1", "WC vs DS2", "WC vs DS3"),
IniTreatment = c(4.5, 4.5, 4.5),
EndTreatment = c(5.5, 6.5, 7.5))
all.sig <- rbind(mutate(rs.sig, Compartment = "RS"),
mutate(es.sig, Compartment = "ES"))
dao.p <- all.sig %>%
mutate(Contrast = case_when(Treatment == "D1" ~ "WC vs DS1",
Treatment == "D2" ~ "WC vs DS2",
Treatment == "D3" ~ "WC vs DS3")) %>%
mutate(Compartment = ifelse(Compartment == "RS", "Rhizosphere", "Endosphere")) %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>%
ggplot(aes(as.numeric(Time) * 10)) +
geom_bar() +
geom_vline(data = trt.lines, aes(xintercept = IniTreatment * 10), linetype = 3) +
geom_vline(data = trt.lines, aes(xintercept = EndTreatment * 10), linetype = 3) +
scale_fill_manual(values = resp.pal) +
xlab("Plant Age (days)") +
ylab("Number of OTUs") +
facet_grid(. ~ Compartment + Contrast) +
theme_classic() +
theme(text = element_text(size = 15))
dao.p

Define the clustering methods to test
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
RHIZOSPHERE
z-transform relative abundances; calculate the mean z-score for each treatment, and time point combination; generate a matrix for hierarchical clustering
rs.zs.tidy <- rs.otu.tidy %>%
inner_join(map, by = "SampleID") %>%
filter(OTU_ID %in% rs.sig$OTU_ID) %>%
group_by(OTU_ID) %>%
mutate(zscore = (Count - mean(Count))/sd(Count)) %>%
group_by(Treatment, Time, OTU_ID, Age) %>%
summarise(MeanZS = mean(zscore)) %>%
ungroup()
`summarise()` regrouping output by 'Treatment', 'Time', 'OTU_ID' (override with `.groups` argument)
rs.zs.mtx <- rs.zs.tidy %>%
mutate(Group = paste(Treatment, Time, sep = ".")) %>%
select(Group, MeanZS, OTU_ID) %>%
spread(key = Group, value = MeanZS)
rs.zs.mtx <- as.data.frame(rs.zs.mtx)
rownames(rs.zs.mtx) <- rs.zs.mtx$OTU_ID
rs.zs.mtx <- rs.zs.mtx[,-1]
###################################################################
rs.fc.mtx <- rs.fc %>%
separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>%
mutate(Treatment = Treatment2) %>%
filter(Treatment2 != "WC_TRN") %>%
filter(OTU_ID %in% rs.sig$OTU_ID) %>%
mutate(Contrast = paste(Treatment2, Time, sep = ".")) %>%
select(Contrast, OTU_ID, estimate) %>%
spread(key = Contrast, value = estimate)
rs.fc.mtx <- as.data.frame(rs.fc.mtx)
rownames(rs.fc.mtx) <- rs.fc.mtx$OTU_ID
rs.fc.mtx <- rs.fc.mtx[,-1]
Choose a clustering method and number of clusters Approach based on: https://uc-r.github.io/hc_clustering
# See what values of k might be worth testing
fviz_nbclust(rs.zs.mtx, FUN = hcut, method = "wss")
Registered S3 method overwritten by 'data.table':
method from
print.data.table

# See which method gets you the best clustering
rs.ac <- function(x) {
agnes(rs.zs.mtx, method = x)$ac
}
map_dbl(m, rs.ac)
average single complete ward
0.5414037 0.3187765 0.7020742 0.9513128
###################################################################
# See what values of k might be worth testing
fviz_nbclust(rs.fc.mtx, FUN = hcut, method = "wss")

# See which method gets you the best clustering
rs.ac <- function(x) {
agnes(rs.fc.mtx, method = x)$ac
}
map_dbl(m, rs.ac)
average single complete ward
0.6296994 0.5583461 0.7871469 0.9502967
It seems the elbow is around 3 so test 2:4 by changing the value of the rs.k variable Perform hierarchical clustering and get the set of clusters and order of OTUs
rs.k <- 4
rs.dist <- dist(as.matrix(rs.zs.mtx))
rs.clust <- hclust(rs.dist, method = "ward.D")
rs.ord.names <- rs.clust$labels[rs.clust$order]
rs.ord <- data.frame(OTU_ID = rs.ord.names, order = 1:length(rs.ord.names))
rs.cut <- cutree(rs.clust[c(1,2,4)], k = rs.k)
rs.ord$Cluster <- as.factor(rs.cut[rs.ord$OTU_ID])
rs.ord <- rs.ord %>%
mutate(Cluster = paste("C", Cluster, sep = "")) %>%
group_by(Cluster) %>%
mutate(nOTU = n()) %>%
ungroup() %>%
mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))
###################################################################
rs.k <- 4
rs.dist <- dist(as.matrix(rs.fc.mtx))
rs.clust <- hclust(rs.dist, method = "ward.D")
rs.ord.names <- rs.clust$labels[rs.clust$order]
rs.ord <- data.frame(OTU_ID = rs.ord.names, order = 1:length(rs.ord.names))
rs.cut <- cutree(rs.clust[c(1,2,4)], k = rs.k)
rs.ord$Cluster <- as.factor(rs.cut[rs.ord$OTU_ID])
rs.ord <- rs.ord %>%
mutate(Cluster = paste("C", Cluster, sep = "")) %>%
group_by(Cluster) %>%
mutate(nOTU = n()) %>%
ungroup() %>%
mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))
Visualize as heatmap
rs.hm.p <- rs.zs.tidy %>%
inner_join(rs.ord, by = "OTU_ID") %>%
mutate(Treatment = str_replace(Treatment, "D", "DS")) %>%
mutate(Treatment = fct_relevel(Treatment, "WC")) %>%
ggplot(aes(Time*10, reorder(OTU_ID, order), fill = MeanZS)) +
geom_tile() +
geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
#scale_fill_distiller(palette = "Spectral") +
ylab("Differentially Abundant OTU") +
xlab("Plant Age (days)") +
facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
theme_classic() +
theme(text = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
legend.title.align = 1)
rs.hm.p

#################################
rs.fc %>%
separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>%
mutate(Treatment = Treatment2) %>%
filter(Treatment2 != "WC_TRN") %>%
mutate(Time = as.numeric(Time)) %>%
inner_join(rs.ord, by = "OTU_ID") %>%
mutate(Treatment = str_replace(Treatment, "D", "DS")) %>%
mutate(Treatment = fct_relevel(Treatment, "WC")) %>%
mutate(estimate2 = ifelse(abs(estimate) >5, 5 * sign(estimate), estimate)) %>%
ggplot(aes(Time*10, reorder(OTU_ID, order), fill = estimate2)) +
geom_tile() +
geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
#scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
scale_fill_gradientn(name = "log2FC",
colors = RColorBrewer::brewer.pal(11, "BrBG")) +
#scale_fill_distiller(palette = "Spectral") +
ylab("Differentially Abundant OTU") +
xlab("Plant Age (days)") +
facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
theme_classic() +
theme(text = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
legend.title.align = 1)
Unknown levels in `f`: WC

Visualize the aggregated relative abundances
rs.agg.ra <- rs.otu.tidy %>%
inner_join(rs.ord, by = "OTU_ID") %>%
group_by(Cluster, SampleID) %>%
summarise(AggRelAb = sum(Count)) %>%
inner_join(rs.map, by = "SampleID") %>%
ungroup() %>%
mutate(Treatment = str_replace(Treatment, "D", "DS")) %>%
mutate(Treatment = fct_relevel(Treatment, "WC"))
`summarise()` regrouping output by 'Cluster' (override with `.groups` argument)
rs.agg.means <- rs.agg.ra %>%
group_by(Treatment, Compartment, Time, Cluster) %>%
mutate(MeanRelAb = mean(AggRelAb)) %>%
ungroup()
rs.clstr.p <- rs.agg.ra %>%
ggplot(aes(Age, AggRelAb, color = Treatment)) +
geom_point(stat = "identity", position = "dodge", alpha = 0.5, shape = 1) +
geom_line(data = rs.agg.means, aes(y = MeanRelAb), size = 1) +
geom_vline(xintercept = ws.line, linetype = 3) +
scale_color_manual(values = trt.pal, name = "Treatment") +
guides(color = guide_legend(ncol = 2)) +
ylab("Relative Abundance") +
xlab("Plant Age (days)") +
facet_wrap(~ Cluster, ncol = 1, scales = "free") +
theme_classic() +
theme(legend.position = "bottom",
text = element_text(size = 15))
rs.clstr.p

plot_grid(rs.hm.p, rs.clstr.p,
nrow = 1,
rel_widths = c(4,3),
align = "h",
axis = "b",
labels = c("A", "B"),
label_size = 20)
Width not defined. Set with `position_dodge(width = ?)`

ENDOSPHERE
z-transform relative abundances, calculate the mean z-score for each treatment, and time point combination, generate a matrix for hierarchical clustering
es.zs.tidy <- es.otu.tidy %>%
inner_join(map, by = "SampleID") %>%
filter(OTU_ID %in% es.sig$OTU_ID) %>%
group_by(OTU_ID) %>%
mutate(zscore = (Count - mean(Count))/sd(Count)) %>%
group_by(Treatment, Time, OTU_ID, Age) %>%
summarise(MeanZS = mean(zscore)) %>%
ungroup()
`summarise()` regrouping output by 'Treatment', 'Time', 'OTU_ID' (override with `.groups` argument)
es.zs.mtx <- es.zs.tidy %>%
mutate(Group = paste(Treatment, Time, sep = ".")) %>%
select(Group, MeanZS, OTU_ID) %>%
spread(key = Group, value = MeanZS)
es.zs.mtx <- as.data.frame(es.zs.mtx)
rownames(es.zs.mtx) <- es.zs.mtx$OTU_ID
es.zs.mtx <- es.zs.mtx[,-1]
###################################################################
es.fc.mtx <- es.fc %>%
separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>%
mutate(Treatment = Treatment2) %>%
filter(Treatment2 != "WC_TRN") %>%
filter(OTU_ID %in% es.sig$OTU_ID) %>%
mutate(Contrast = paste(Treatment2, Time, sep = ".")) %>%
select(Contrast, OTU_ID, estimate) %>%
spread(key = Contrast, value = estimate)
es.fc.mtx <- as.data.frame(es.fc.mtx)
rownames(es.fc.mtx) <- es.fc.mtx$OTU_ID
es.fc.mtx <- es.fc.mtx[,-1]
Choose a clustering method and number of clusters
fviz_nbclust(es.zs.mtx, FUN = hcut, method = "wss")

es.ac <- function(x) {
agnes(es.zs.mtx, method = x)$ac
}
map_dbl(m, es.ac)
average single complete ward
0.5642509 0.3817472 0.7234563 0.9242376
###################################################################
# See what values of k might be worth testing
fviz_nbclust(es.fc.mtx, FUN = hcut, method = "wss")

# See which method gets you the best clustering
es.ac <- function(x) {
agnes(es.fc.mtx, method = x)$ac
}
map_dbl(m, es.ac)
average single complete ward
0.6473055 0.5958731 0.7704513 0.9282877
Perform hierarchical clustering and get the set of clusters and order of OTUs
es.k <- 6
es.dist <- dist(as.matrix(es.zs.mtx))
es.clust <- hclust(es.dist, method = "ward.D")
es.ord.names <- es.clust$labels[es.clust$order]
es.ord <- data.frame(OTU_ID = es.ord.names, order = 1:length(es.ord.names))
es.cut <- cutree(es.clust[c(1,2,4)], k = es.k)
es.ord$Cluster <- as.factor(es.cut[es.ord$OTU_ID])
es.ord <- es.ord %>%
mutate(Cluster = paste("C", Cluster, sep = "")) %>%
group_by(Cluster) %>%
mutate(nOTU = n()) %>%
ungroup() %>%
mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))
###################################################################
es.k <- 5
es.dist <- dist(as.matrix(es.fc.mtx))
es.clust <- hclust(es.dist, method = "ward.D")
es.ord.names <- es.clust$labels[es.clust$order]
es.ord <- data.frame(OTU_ID = es.ord.names, order = 1:length(es.ord.names))
es.cut <- cutree(es.clust[c(1,2,4)], k = es.k)
es.ord$Cluster <- as.factor(es.cut[es.ord$OTU_ID])
es.ord <- es.ord %>%
mutate(Cluster = paste("C", Cluster, sep = "")) %>%
group_by(Cluster) %>%
mutate(nOTU = n()) %>%
ungroup() %>%
mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))
Visualize as heatmap
es.hm.p <- es.zs.tidy %>%
inner_join(es.ord, by = "OTU_ID") %>%
mutate(Treatment = str_replace(Treatment, "D", "DS")) %>%
mutate(Treatment = fct_relevel(Treatment, "WC")) %>%
ggplot(aes(Time*10, reorder(OTU_ID, order), fill = MeanZS)) +
geom_tile() +
geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
#scale_fill_distiller(palette = "Spectral") +
ylab("Differentially Abundant OTU") +
xlab("Plant Age (days)") +
facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
theme_classic() +
theme(text = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
legend.title.align = 1)
es.hm.p

#################################
es.fc %>%
separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>%
mutate(Treatment = Treatment2) %>%
filter(Treatment2 != "WC_TRN") %>%
mutate(Time = as.numeric(Time)) %>%
inner_join(es.ord, by = "OTU_ID") %>%
mutate(Treatment = str_replace(Treatment, "D", "DS")) %>%
mutate(Treatment = fct_relevel(Treatment, "WC")) %>%
mutate(estimate2 = ifelse(abs(estimate) >5, 5 * sign(estimate), estimate)) %>%
ggplot(aes(Time*10, reorder(OTU_ID, order), fill = estimate2)) +
geom_tile() +
geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
#scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
scale_fill_gradientn(name = "log2FC",
colors = RColorBrewer::brewer.pal(11, "BrBG")) +
#scale_fill_distiller(palette = "Spectral") +
ylab("Differentially Abundant OTU") +
xlab("Plant Age (days)") +
facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
theme_classic() +
theme(text = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
legend.title.align = 1)
Unknown levels in `f`: WC

Visualize the aggregated relative abundances
es.agg.ra <- es.otu.tidy %>%
inner_join(es.ord, by = "OTU_ID") %>%
group_by(Cluster, SampleID) %>%
summarise(AggRelAb = sum(Count)) %>%
inner_join(es.map, by = "SampleID") %>%
ungroup() %>%
mutate(Treatment = str_replace(Treatment, "D", "DS")) %>%
mutate(Treatment = fct_relevel(Treatment, "WC"))
`summarise()` regrouping output by 'Cluster' (override with `.groups` argument)
es.agg.means <- es.agg.ra %>%
group_by(Treatment, Compartment, Time, Cluster) %>%
mutate(MeanRelAb = mean(AggRelAb)) %>%
ungroup()
es.clstr.p <- es.agg.ra %>%
ggplot(aes(Age, AggRelAb, color = Treatment)) +
geom_point(stat = "identity", position = "dodge", alpha = 0.5, shape = 1) +
geom_line(data = es.agg.means, aes(y = MeanRelAb), size = 1) +
geom_vline(xintercept = ws.line, linetype = 3) +
scale_color_manual(values = trt.pal, name = "Treatment") +
guides(color = guide_legend(ncol = 2)) +
ylab("Relative Abundance") +
xlab("Plant Age (days)") +
facet_wrap(~ Cluster, ncol = 1, scales = "free") +
theme_classic() +
theme(legend.position = "bottom",
text = element_text(size = 15))
es.clstr.p

plot_grid(es.hm.p, es.clstr.p,
nrow = 1,
rel_widths = c(3,2),
align = "h",
axis = "b",
labels = c("A", "B"),
label_size = 20)
Width not defined. Set with `position_dodge(width = ?)`

all.ord <- rbind(mutate(rs.ord, Compartment = "RS"),
mutate(es.ord, Compartment = "ES"))
rbind(mutate(rs.fc, Compartment = "RS"),
mutate(es.fc, Compartment = "ES")) %>%
separate(Contrast, c("Treatment2", "Time"), sep = "\\.", remove = F) %>%
mutate(Treatment = Treatment2) %>%
filter(Treatment2 != "WC_TRN") %>%
mutate(Time = as.numeric(Time)) %>%
inner_join(all.ord, by = c("Compartment", "OTU_ID")) %>%
mutate(Contrast = case_when(Treatment == "D1" ~ "WC vs DS1",
Treatment == "D2" ~ "WC vs DS2",
Treatment == "D3" ~ "WC vs DS3")) %>%
mutate(Compartment = fct_relevel(Compartment, "RS")) %>%
mutate(estimate2 = ifelse(abs(estimate) >4, 4 * sign(estimate), estimate)) %>%
ggplot(aes(Time*10, reorder(OTU_ID, order), fill = estimate2)) +
geom_tile() +
geom_vline(data = trt.lines, aes(xintercept = IniTreatment * 10), linetype = 3) +
geom_vline(data = trt.lines, aes(xintercept = EndTreatment * 10), linetype = 3) +
#scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
scale_fill_gradientn(name = "log2FC",
colors = RColorBrewer::brewer.pal(11, "BrBG")) +
#scale_fill_distiller(palette = "Spectral") +
ylab("Differentially Abundant OTU") +
xlab("Plant Age (days)") +
facet_grid(Compartment + Cluster ~ Contrast, scales = "free", space = "free") +
theme_classic() +
theme(text = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
legend.title.align = 1)

all.ord %>%
inner_join(tax, by = "OTU_ID") %>%
ggplot(aes(1, fill = PhyClass2)) +
geom_bar(position = "stack") +
facet_grid(Compartment + Cluster ~ ., scales = "free", space = "free") +
scale_fill_manual(name = "Taxon",
values = phy.pal[c(11:14,2:10,1)],
limits = levels(tax$PhyClass2)[c(11:14,2:10,1)]) +
theme_classic() +
theme(text = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
legend.position = "bottom",
legend.title.align = 1)

Generate a data frame with all clustering results, and label the interesting clusters for downstreama analyses Genearte a data frame with all the mean z-scores for plotting
all.clust.summary <- rbind(mutate(rs.ord, Compartment = "RS"),
mutate(es.ord, Compartment = "ES")) %>%
mutate(Cluster = paste(Compartment, Cluster, sep = "")) %>%
filter(Cluster %in% c("RSC1", "RSC2", "RSC4", "ESC3", "ESC5")) %>%
mutate(Trend = case_when(Cluster == "RSC1" ~ "Transient Enrichment",
Cluster == "RSC2" ~ "Persistent Depletion",
Cluster == "RSC4" ~ "Transient Depletion",
Cluster == "ESC3" ~ "Semi-Persistent Enrichment",
Cluster == "ESC5" ~ "Persistent Depletion"))
all.cluster.total <- all.clust.summary %>%
group_by(Cluster, nOTU, Cluster2, Compartment, Trend) %>%
count() %>%
select(-n) %>%
ungroup() %>%
mutate(Compartment = fct_recode(Compartment,
"Rhizosphere" = "RS",
"Endosphere" = "ES")) %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere"))
all.agg.ra <- rbind(mutate(rs.agg.ra, Compartment = "RS"),
mutate(es.agg.ra, Compartment = "ES")) %>%
mutate(Cluster = paste(Compartment, Cluster, sep = ""))
saveRDS(all.clust.summary, "../Data/drought_clusters.RDS")
###############################################################################
all.clust.summary <- rbind(mutate(rs.ord, Compartment = "RS"),
mutate(es.ord, Compartment = "ES")) %>%
mutate(Cluster = paste(Compartment, Cluster, sep = "")) %>%
filter(Cluster %in% c("RSC1", "RSC2", "RSC4", "ESC3", "ESC4")) %>%
mutate(Trend = case_when(Cluster == "RSC1" ~ "Transient Enrichment",
Cluster == "RSC2" ~ "Transient Depletion",
Cluster == "RSC4" ~ "Persistent Depletion",
Cluster == "ESC3" ~ "Semi-Persistent Enrichment",
Cluster == "ESC4" ~ "Persistent Depletion"))
all.cluster.total <- all.clust.summary %>%
group_by(Cluster, nOTU, Cluster2, Compartment, Trend) %>%
count() %>%
select(-n) %>%
ungroup() %>%
mutate(Compartment = fct_recode(Compartment,
"Rhizosphere" = "RS",
"Endosphere" = "ES")) %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere"))
all.agg.ra <- rbind(mutate(rs.agg.ra, Compartment = "RS"),
mutate(es.agg.ra, Compartment = "ES")) %>%
mutate(Cluster = paste(Compartment, Cluster, sep = ""))
saveRDS(all.clust.summary, "../Data/drought_clusters.RDS")
all.cluster.means <- all.agg.ra %>%
mutate(Compartment = fct_recode(Compartment,
"Rhizosphere" = "RS",
"Endosphere" = "ES")) %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>%
inner_join(select(all.cluster.total, -Compartment), by = "Cluster") %>%
group_by(Compartment, Time, Age, Treatment2, Trend) %>%
summarise(MeanRelAb = mean(AggRelAb)) %>%
ungroup() %>%
mutate(Treatment2 = str_replace(Treatment2, "D", "DS")) %>%
mutate(Treatment2 = fct_relevel(Treatment2, "WC"))
`summarise()` regrouping output by 'Compartment', 'Time', 'Age', 'Treatment2' (override with `.groups` argument)
all.cluster.ab <- all.agg.ra %>%
mutate(Compartment = fct_recode(Compartment,
"Rhizosphere" = "RS",
"Endosphere" = "ES")) %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>%
inner_join(select(all.cluster.total, -Compartment), by = "Cluster") %>%
mutate(Treatment2 = str_replace(Treatment2, "D", "DS")) %>%
mutate(Treatment2 = fct_relevel(Treatment2, "WC"))
all.cluster.tax <- all.clust.summary %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>%
group_by(Compartment, Cluster, OTU_ID) %>%
count() %>%
ungroup() %>%
inner_join(tax, by = "OTU_ID") %>%
inner_join(select(all.clust.summary, OTU_ID, Trend, Compartment), by = c("Compartment", "OTU_ID")) %>%
mutate(Compartment = fct_recode(Compartment,
"Rhizosphere" = "RS",
"Endosphere" = "ES")) %>%
mutate(Compartment = fct_relevel(Compartment, "Rhizosphere"))
Unknown levels in `f`: Rhizosphere
all.ab <- all.cluster.ab %>%
ggplot(aes(Age, AggRelAb, color = Treatment2)) +
geom_point(alpha = 1, shape = 1, size = 2) +
geom_vline(xintercept = ws.line, linetype = 3) +
geom_line(data = all.cluster.means, aes(y = MeanRelAb), size = 1) +
ylab("Cumulative Relative Abundance") +
xlab("Plant Age (days)") +
facet_wrap(~ Compartment + Trend, scales = "free") +
scale_color_manual(values = trt.pal, name = "Treatment") +
guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
theme_classic() +
theme(legend.position = c(0.8,0.2),
text = element_text(size = 15))
all.ab

all.tax.p <- all.cluster.tax %>%
group_by(Compartment, Trend, PhyClass2) %>%
summarise(Count = sum(n)) %>%
group_by(Compartment, Trend) %>%
mutate(Fraction = Count / sum(Count)) %>%
mutate(ymax = cumsum(Fraction),
nPhy = n()) %>%
mutate(ymin = c(0, ymax[1:nPhy - 1])) %>%
ggplot() +
geom_rect(aes(ymax=ymax, ymin=ymin, xmax= 4, xmin= 3, fill= PhyClass2)) +
geom_text(data = all.cluster.total, aes(2, 0, label = nOTU), size = 7) +
scale_fill_manual(name = "Taxon",
values = phy.pal[c(11:14,2:10,1)],
limits = levels(tax$PhyClass2)[c(11:14,2:10,1)]) +
guides(fill = guide_legend(ncol = 4)) +
coord_polar(theta="y") +
xlim(c(2, 4)) +
facet_wrap(. ~ Compartment + Trend, nrow = 1) +
theme_void() +
theme(text = element_text(size = 15),
legend.position = "bottom",
legend.title = element_blank(),
strip.background = element_blank(),
strip.text = element_blank())
`summarise()` regrouping output by 'Compartment', 'Trend' (override with `.groups` argument)
numerical expression has 7 elements: only the first usednumerical expression has 12 elements: only the first usednumerical expression has 14 elements: only the first usednumerical expression has 11 elements: only the first usednumerical expression has 3 elements: only the first used
all.tax.p

all.clust.p <- plot_grid(all.ab, all.tax.p,
ncol = 1,
rel_heights = c(6,4),
align = "v",
axis = "lr",
labels = "B",
label_size = 20)
number of items to replace is not a multiple of replacement length
plot_grid(dao.p, all.ab, all.tax.p,
ncol = 1,
rel_heights = c(1,2,1),
labels = c("A", "B", NA),
label_size = 20) ###766:936
number of items to replace is not a multiple of replacement length

all.cluster.tax %>%
group_by(Compartment, Trend, Phylum, PhyClass2, Class, Order) %>%
count() %>%
filter(n > 1) %>%
mutate(Taxonomy = paste(Phylum, Class, Order, sep = " / ")) %>%
ggplot(aes(Trend, Taxonomy, fill = PhyClass2)) +
geom_tile(color = "white", size = 1) +
geom_text(aes(label = n)) +
scale_fill_manual(name = "Taxon",
values = phy.pal,
limits = levels(tax$PhyClass2)) +
facet_grid(. ~ Compartment, scales = "free", space = "free") +
theme_minimal() +
theme(text = element_text(size = 15),
axis.text.x = element_text(angle = 45, hjust = 1))
Using `n` as weighting variable
ℹ Quiet this message with `wt = n` or count rows with `wt = 1`

---
title: "OTU Clustering"
output: html_notebook
---

Load libraries
```{r}
source("../General/rmb_functions.R")
source("../General/parameters.R")
library(factoextra)
library(cluster)
library(ggtree)
library(cowplot)
library(tidyverse)
```

Load data
```{r}
otu <- readRDS("../Data/otu_pers.RDS")
map <- readRDS("../Data/drought_map.RDS")
rs.fc <- readRDS("../Data/rs_shlfc.RDS")
es.fc <- readRDS("../Data/es_shlfc.RDS")
tax <- readRDS("../Data/tax.RDS") %>% 
  mutate(PhyClass2 = fct_recode(PhyClass2, "Low Abundance" = "other"))
```

Remove RF training set from mapping file. Subset mapping files for each compartment
```{r}
map <- filter(map, Treatment2 != "WC_TRN")
rs.map <- filter(map, Compartment == "RS")
es.map <- filter(map, Compartment == "ES")
```

Get the relative abundances, reformat, and subset
```{r}
otu <- rel_ab(otu)

rs.otu.tidy <- tidy_otu(otu) %>% 
  mutate(Count = Count/100) %>% 
  filter(SampleID %in% rs.map$SampleID) %>% 
  filter(!is.na(Count)) 

es.otu.tidy <- tidy_otu(otu) %>% 
  mutate(Count = Count/100) %>% 
  filter(SampleID %in% es.map$SampleID) %>% 
  filter(!is.na(Count)) 
```

Get significant OTUs for each set
```{r}
rs.sig <- rs.fc %>% 
  select(-Day) %>% 
  separate(Contrast, c("Treatment", "Time"), sep = "\\.") %>% 
  filter(p.adjusted < 0.05) %>% 
  filter(Treatment != "WC_TRN") 

es.sig <- es.fc %>% 
  select(-Day) %>% 
  separate(Contrast, c("Treatment", "Time"), sep = "\\.") %>% 
  filter(p.adjusted < 0.05) %>% 
  filter(Treatment != "WC_TRN") 
```

For visualization purposes, generate a data frame with the range of drought stress time points for each treatment 
```{r}
trt.lines <- data.frame(Treatment = c("DS1", "DS2", "DS3"),
                        Treatment2 = c("DS1", "DS2", "DS3"),
                        Contrast = c("WC vs DS1", "WC vs DS2", "WC vs DS3"),
                        IniTreatment = c(4.5, 4.5, 4.5),
                        EndTreatment = c(5.5, 6.5, 7.5))
```


```{r}
all.sig <- rbind(mutate(rs.sig, Compartment = "RS"),
                 mutate(es.sig, Compartment = "ES"))

dao.p <- all.sig %>% 
  mutate(Contrast = case_when(Treatment == "D1" ~ "WC vs DS1",
                              Treatment == "D2" ~ "WC vs DS2",
                              Treatment == "D3" ~ "WC vs DS3")) %>% 
  mutate(Compartment = ifelse(Compartment == "RS", "Rhizosphere", "Endosphere")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>% 
  ggplot(aes(as.numeric(Time) * 10)) +
  geom_bar() +
  geom_vline(data = trt.lines, aes(xintercept = IniTreatment * 10), linetype = 3) +
  geom_vline(data = trt.lines, aes(xintercept = EndTreatment * 10), linetype = 3) +
  scale_fill_manual(values = resp.pal) +
  xlab("Plant Age (days)") +
  ylab("Number of OTUs") +
  facet_grid(. ~ Compartment + Contrast) +
  theme_classic() +
  theme(text = element_text(size = 15))

dao.p
```
```{r}
time.age <- map %>% 
  group_by(Time, Age) %>% 
  count() %>% 
  ungroup() %>% 
  select(-n)

dao.supp <- all.sig %>% 
  mutate(Time = as.integer(Time)) %>% 
  mutate(Contrast = paste("WC_vs_", Treatment, sep = "")) %>% 
  select(-Treatment) %>% 
  inner_join(time.age, by = "Time") %>% 
  select(Compartment, Contrast, Time, Age, everything()) %>% 
  inner_join(tax, by = "OTU_ID") %>% 
  select(-PhyClass, -PhyClass2)

write.table(dao.supp, "../Tables/dao.tsv", sep = "\t", quote = F, row.names = F)
```


Define the clustering methods to test
```{r}
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
```


RHIZOSPHERE

z-transform relative abundances; calculate the mean z-score for each treatment, and time point combination; generate a matrix for hierarchical clustering
```{r}
rs.zs.tidy <- rs.otu.tidy %>%
  inner_join(map, by = "SampleID") %>%
  filter(OTU_ID %in% rs.sig$OTU_ID) %>%
  group_by(OTU_ID) %>%
  mutate(zscore = (Count - mean(Count))/sd(Count)) %>%
  group_by(Treatment, Time, OTU_ID, Age) %>%
  summarise(MeanZS = mean(zscore)) %>%
  ungroup()

rs.zs.mtx <- rs.zs.tidy %>% 
    mutate(Group = paste(Treatment, Time, sep = ".")) %>% 
    select(Group, MeanZS, OTU_ID) %>% 
    spread(key = Group, value = MeanZS)
rs.zs.mtx <- as.data.frame(rs.zs.mtx) 
rownames(rs.zs.mtx) <- rs.zs.mtx$OTU_ID 
rs.zs.mtx <- rs.zs.mtx[,-1] 

###################################################################

rs.fc.mtx <- rs.fc %>% 
  separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>% 
  mutate(Treatment = Treatment2) %>% 
  filter(Treatment2 != "WC_TRN") %>% 
  filter(OTU_ID %in% rs.sig$OTU_ID) %>% 
  mutate(Contrast = paste(Treatment2, Time, sep = ".")) %>% 
  select(Contrast, OTU_ID, estimate) %>% 
  spread(key = Contrast, value = estimate)
rs.fc.mtx <- as.data.frame(rs.fc.mtx) 
rownames(rs.fc.mtx) <- rs.fc.mtx$OTU_ID 
rs.fc.mtx <- rs.fc.mtx[,-1] 
```

Choose a clustering method and number of clusters
Approach based on: https://uc-r.github.io/hc_clustering
```{r}
# See what values of k might be worth testing
fviz_nbclust(rs.zs.mtx, FUN = hcut, method = "wss")

# See which method gets you the best clustering
rs.ac <- function(x) {
  agnes(rs.zs.mtx, method = x)$ac
}
map_dbl(m, rs.ac)


###################################################################
# See what values of k might be worth testing
fviz_nbclust(rs.fc.mtx, FUN = hcut, method = "wss")

# See which method gets you the best clustering
rs.ac <- function(x) {
  agnes(rs.fc.mtx, method = x)$ac
}
map_dbl(m, rs.ac)
```

It seems the elbow is around 3 so test 2:4 by changing the value of the rs.k variable
Perform hierarchical clustering and get the set of clusters and order of OTUs
```{r}
rs.k <- 4

rs.dist <- dist(as.matrix(rs.zs.mtx)) 
rs.clust <- hclust(rs.dist, method = "ward.D") 
rs.ord.names <- rs.clust$labels[rs.clust$order] 
rs.ord <- data.frame(OTU_ID = rs.ord.names, order = 1:length(rs.ord.names))

rs.cut <- cutree(rs.clust[c(1,2,4)], k = rs.k)
rs.ord$Cluster <- as.factor(rs.cut[rs.ord$OTU_ID])

rs.ord <- rs.ord %>% 
  mutate(Cluster = paste("C", Cluster, sep = "")) %>% 
  group_by(Cluster) %>% 
  mutate(nOTU = n()) %>% 
  ungroup() %>% 
  mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))

###################################################################

rs.k <- 4

rs.dist <- dist(as.matrix(rs.fc.mtx)) 
rs.clust <- hclust(rs.dist, method = "ward.D") 
rs.ord.names <- rs.clust$labels[rs.clust$order] 
rs.ord <- data.frame(OTU_ID = rs.ord.names, order = 1:length(rs.ord.names))

rs.cut <- cutree(rs.clust[c(1,2,4)], k = rs.k)
rs.ord$Cluster <- as.factor(rs.cut[rs.ord$OTU_ID])

rs.ord <- rs.ord %>% 
  mutate(Cluster = paste("C", Cluster, sep = "")) %>% 
  group_by(Cluster) %>% 
  mutate(nOTU = n()) %>% 
  ungroup() %>% 
  mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))
```

Visualize as heatmap
```{r}
rs.hm.p <- rs.zs.tidy %>% 
  inner_join(rs.ord, by = "OTU_ID") %>% 
  mutate(Treatment = str_replace(Treatment, "D", "DS")) %>% 
  mutate(Treatment = fct_relevel(Treatment, "WC")) %>% 
  ggplot(aes(Time*10, reorder(OTU_ID, order), fill = MeanZS)) +
  geom_tile() +
  geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
  geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
  scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
  #scale_fill_distiller(palette = "Spectral") +
  ylab("Differentially Abundant OTU") +
  xlab("Plant Age (days)") +
  facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
  theme_classic() +
  theme(text = element_text(size = 15),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom",
        legend.title.align = 1)

rs.hm.p

#################################
rs.fc %>% 
  separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>% 
  mutate(Treatment = Treatment2) %>% 
  filter(Treatment2 != "WC_TRN") %>% 
  mutate(Time = as.numeric(Time)) %>% 
  inner_join(rs.ord, by = "OTU_ID") %>% 
  mutate(Treatment = str_replace(Treatment, "D", "DS")) %>% 
  mutate(Treatment = fct_relevel(Treatment, "WC")) %>% 
  mutate(estimate2 = ifelse(abs(estimate) >5, 5 * sign(estimate), estimate)) %>% 
  ggplot(aes(Time*10, reorder(OTU_ID, order), fill = estimate2)) +
  geom_tile() +
  geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
  geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
  #scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
  scale_fill_gradientn(name = "log2FC",
                       colors = RColorBrewer::brewer.pal(11, "BrBG")) +
  #scale_fill_distiller(palette = "Spectral") +
  ylab("Differentially Abundant OTU") +
  xlab("Plant Age (days)") +
  facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
  theme_classic() +
  theme(text = element_text(size = 15),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom",
        legend.title.align = 1)
```

Visualize the aggregated relative abundances
```{r}
rs.agg.ra <- rs.otu.tidy %>% 
  inner_join(rs.ord, by = "OTU_ID") %>%
  group_by(Cluster, SampleID) %>% 
  summarise(AggRelAb = sum(Count)) %>% 
  inner_join(rs.map, by = "SampleID") %>% 
  ungroup() %>% 
  mutate(Treatment = str_replace(Treatment, "D", "DS")) %>% 
  mutate(Treatment = fct_relevel(Treatment, "WC"))

rs.agg.means <- rs.agg.ra %>% 
  group_by(Treatment, Compartment, Time, Cluster) %>% 
  mutate(MeanRelAb = mean(AggRelAb)) %>% 
  ungroup() 

rs.clstr.p <- rs.agg.ra %>% 
  ggplot(aes(Age, AggRelAb, color = Treatment)) +
  geom_point(stat = "identity", position = "dodge", alpha = 0.5, shape = 1) +
  geom_line(data = rs.agg.means, aes(y = MeanRelAb), size = 1) +
  geom_vline(xintercept = ws.line, linetype = 3) +
  scale_color_manual(values = trt.pal, name = "Treatment") +
  guides(color = guide_legend(ncol = 2)) +
  ylab("Relative Abundance") +
  xlab("Plant Age (days)") +
  facet_wrap(~ Cluster, ncol = 1, scales = "free") +
  theme_classic() +
  theme(legend.position = "bottom",
        text = element_text(size = 15))

rs.clstr.p
```

```{r}
plot_grid(rs.hm.p, rs.clstr.p,
          nrow = 1,
          rel_widths = c(4,3),
          align = "h",
          axis = "b",
          labels = c("A", "B"),
          label_size = 20)
```


ENDOSPHERE

z-transform relative abundances, calculate the mean z-score for each treatment, and time point combination, generate a matrix for hierarchical clustering
```{r}
es.zs.tidy <- es.otu.tidy %>%
  inner_join(map, by = "SampleID") %>%
  filter(OTU_ID %in% es.sig$OTU_ID) %>%
  group_by(OTU_ID) %>%
  mutate(zscore = (Count - mean(Count))/sd(Count)) %>%
  group_by(Treatment, Time, OTU_ID, Age) %>%
  summarise(MeanZS = mean(zscore)) %>%
  ungroup()

es.zs.mtx <- es.zs.tidy %>% 
    mutate(Group = paste(Treatment, Time, sep = ".")) %>% 
    select(Group, MeanZS, OTU_ID) %>% 
    spread(key = Group, value = MeanZS)
es.zs.mtx <- as.data.frame(es.zs.mtx) 
rownames(es.zs.mtx) <- es.zs.mtx$OTU_ID 
es.zs.mtx <- es.zs.mtx[,-1] 

###################################################################

es.fc.mtx <- es.fc %>% 
  separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>% 
  mutate(Treatment = Treatment2) %>% 
  filter(Treatment2 != "WC_TRN") %>% 
  filter(OTU_ID %in% es.sig$OTU_ID) %>% 
  mutate(Contrast = paste(Treatment2, Time, sep = ".")) %>% 
  select(Contrast, OTU_ID, estimate) %>% 
  spread(key = Contrast, value = estimate)
es.fc.mtx <- as.data.frame(es.fc.mtx) 
rownames(es.fc.mtx) <- es.fc.mtx$OTU_ID 
es.fc.mtx <- es.fc.mtx[,-1] 
```

Choose a clustering method and number of clusters
```{r}
fviz_nbclust(es.zs.mtx, FUN = hcut, method = "wss")

es.ac <- function(x) {
  agnes(es.zs.mtx, method = x)$ac
}
map_dbl(m, es.ac)

###################################################################
# See what values of k might be worth testing
fviz_nbclust(es.fc.mtx, FUN = hcut, method = "wss")

# See which method gets you the best clustering
es.ac <- function(x) {
  agnes(es.fc.mtx, method = x)$ac
}
map_dbl(m, es.ac)
```

Perform hierarchical clustering and get the set of clusters and order of OTUs
```{r}
es.k <- 6


es.dist <- dist(as.matrix(es.zs.mtx)) 
es.clust <- hclust(es.dist, method = "ward.D") 
es.ord.names <- es.clust$labels[es.clust$order] 
es.ord <- data.frame(OTU_ID = es.ord.names, order = 1:length(es.ord.names))

es.cut <- cutree(es.clust[c(1,2,4)], k = es.k)
es.ord$Cluster <- as.factor(es.cut[es.ord$OTU_ID])

es.ord <- es.ord %>% 
  mutate(Cluster = paste("C", Cluster, sep = "")) %>% 
  group_by(Cluster) %>% 
  mutate(nOTU = n()) %>% 
  ungroup() %>% 
  mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))

###################################################################

es.k <- 5

es.dist <- dist(as.matrix(es.fc.mtx)) 
es.clust <- hclust(es.dist, method = "ward.D") 
es.ord.names <- es.clust$labels[es.clust$order] 
es.ord <- data.frame(OTU_ID = es.ord.names, order = 1:length(es.ord.names))

es.cut <- cutree(es.clust[c(1,2,4)], k = es.k)
es.ord$Cluster <- as.factor(es.cut[es.ord$OTU_ID])

es.ord <- es.ord %>% 
  mutate(Cluster = paste("C", Cluster, sep = "")) %>% 
  group_by(Cluster) %>% 
  mutate(nOTU = n()) %>% 
  ungroup() %>% 
  mutate(Cluster2 = paste(Cluster, " (", nOTU, " OTUs)", sep = ""))
```

Visualize as heatmap
```{r}
es.hm.p <- es.zs.tidy %>% 
  inner_join(es.ord, by = "OTU_ID") %>% 
  mutate(Treatment = str_replace(Treatment, "D", "DS")) %>% 
  mutate(Treatment = fct_relevel(Treatment, "WC")) %>% 
  ggplot(aes(Time*10, reorder(OTU_ID, order), fill = MeanZS)) +
  geom_tile() +
  geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
  geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
  scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
  #scale_fill_distiller(palette = "Spectral") +
  ylab("Differentially Abundant OTU") +
  xlab("Plant Age (days)") +
  facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
  theme_classic() +
  theme(text = element_text(size = 15),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom",
        legend.title.align = 1)

es.hm.p

#################################
es.fc %>% 
  separate(Contrast, c("Treatment2", "Time"), sep = "\\.") %>% 
  mutate(Treatment = Treatment2) %>% 
  filter(Treatment2 != "WC_TRN") %>% 
  mutate(Time = as.numeric(Time)) %>% 
  inner_join(es.ord, by = "OTU_ID") %>% 
  mutate(Treatment = str_replace(Treatment, "D", "DS")) %>% 
  mutate(Treatment = fct_relevel(Treatment, "WC")) %>% 
  mutate(estimate2 = ifelse(abs(estimate) >5, 5 * sign(estimate), estimate)) %>% 
  ggplot(aes(Time*10, reorder(OTU_ID, order), fill = estimate2)) +
  geom_tile() +
  geom_vline(data = trt.lines, aes(xintercept = IniTreatment*10), linetype = 3, color = "white") +
  geom_vline(data = trt.lines, aes(xintercept = EndTreatment*10), linetype = 3, color = "white") +
  #scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
  scale_fill_gradientn(name = "log2FC",
                       colors = RColorBrewer::brewer.pal(11, "BrBG")) +
  #scale_fill_distiller(palette = "Spectral") +
  ylab("Differentially Abundant OTU") +
  xlab("Plant Age (days)") +
  facet_grid(Cluster ~ Treatment, scales = "free", space = "free") +
  theme_classic() +
  theme(text = element_text(size = 15),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom",
        legend.title.align = 1)
```

Visualize the aggregated relative abundances
```{r}
es.agg.ra <- es.otu.tidy %>% 
  inner_join(es.ord, by = "OTU_ID") %>%
  group_by(Cluster, SampleID) %>% 
  summarise(AggRelAb = sum(Count)) %>% 
  inner_join(es.map, by = "SampleID") %>% 
  ungroup() %>% 
  mutate(Treatment = str_replace(Treatment, "D", "DS")) %>% 
  mutate(Treatment = fct_relevel(Treatment, "WC"))

es.agg.means <- es.agg.ra %>% 
  group_by(Treatment, Compartment, Time, Cluster) %>% 
  mutate(MeanRelAb = mean(AggRelAb)) %>% 
  ungroup() 

es.clstr.p <- es.agg.ra %>% 
  ggplot(aes(Age, AggRelAb, color = Treatment)) +
  geom_point(stat = "identity", position = "dodge", alpha = 0.5, shape = 1) +
  geom_line(data = es.agg.means, aes(y = MeanRelAb), size = 1) +
  geom_vline(xintercept = ws.line, linetype = 3) +
  scale_color_manual(values = trt.pal, name = "Treatment") +
  guides(color = guide_legend(ncol = 2)) +  
  ylab("Relative Abundance") +
  xlab("Plant Age (days)") +
  facet_wrap(~ Cluster, ncol = 1, scales = "free") +
  theme_classic() +
  theme(legend.position = "bottom",
        text = element_text(size = 15))

es.clstr.p
```

```{r}
plot_grid(es.hm.p, es.clstr.p,
          nrow = 1,
          rel_widths = c(3,2),
          align = "h",
          axis = "b",
          labels = c("A", "B"),
          label_size = 20)
```


```{r}
all.ord <- rbind(mutate(rs.ord, Compartment = "RS"),
                 mutate(es.ord, Compartment = "ES"))

rbind(mutate(rs.fc, Compartment = "RS"),
      mutate(es.fc, Compartment = "ES")) %>% 
  separate(Contrast, c("Treatment2", "Time"), sep = "\\.", remove = F) %>% 
  mutate(Treatment = Treatment2) %>% 
  filter(Treatment2 != "WC_TRN") %>% 
  mutate(Time = as.numeric(Time)) %>% 
  inner_join(all.ord, by = c("Compartment", "OTU_ID")) %>% 
  mutate(Contrast = case_when(Treatment == "D1" ~ "WC vs DS1",
                              Treatment == "D2" ~ "WC vs DS2",
                              Treatment == "D3" ~ "WC vs DS3")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "RS")) %>% 
  mutate(estimate2 = ifelse(abs(estimate) >4, 4 * sign(estimate), estimate)) %>% 
  ggplot(aes(Time*10, reorder(OTU_ID, order), fill = estimate2)) +
  geom_tile() +
  geom_vline(data = trt.lines, aes(xintercept = IniTreatment * 10), linetype = 3) +
  geom_vline(data = trt.lines, aes(xintercept = EndTreatment * 10), linetype = 3) +
  #scale_fill_viridis_c(name = "Mean Rel. Abund.\n(z-score)") +
  scale_fill_gradientn(name = "log2FC",
                       colors = RColorBrewer::brewer.pal(11, "BrBG")) +
  #scale_fill_distiller(palette = "Spectral") +
  ylab("Differentially Abundant OTU") +
  xlab("Plant Age (days)") +
  facet_grid(Compartment + Cluster ~ Contrast, scales = "free", space = "free") +
  theme_classic() +
  theme(text = element_text(size = 15),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom",
        legend.title.align = 1)

all.ord %>% 
  inner_join(tax, by = "OTU_ID") %>% 
  ggplot(aes(1, fill = PhyClass2)) +
  geom_bar(position = "stack") +
  facet_grid(Compartment + Cluster ~ ., scales = "free", space = "free") +
  scale_fill_manual(name = "Taxon",
                    values = phy.pal[c(11:14,2:10,1)],
                    limits = levels(tax$PhyClass2)[c(11:14,2:10,1)]) +
  theme_classic() +
  theme(text = element_text(size = 15),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "bottom",
        legend.title.align = 1)
```


Generate a data frame with all clustering results, and label the interesting clusters for downstreama analyses
Genearte a data frame with all the mean z-scores for plotting
```{r}
all.clust.summary <- rbind(mutate(rs.ord, Compartment = "RS"), 
                           mutate(es.ord, Compartment = "ES")) %>% 
  mutate(Cluster = paste(Compartment, Cluster, sep = "")) %>%
  filter(Cluster %in% c("RSC1", "RSC2", "RSC4", "ESC3", "ESC5")) %>% 
  mutate(Trend = case_when(Cluster == "RSC1" ~ "Transient Enrichment",
                           Cluster == "RSC2" ~ "Persistent Depletion",
                           Cluster == "RSC4" ~ "Transient Depletion",
                           Cluster == "ESC3" ~ "Semi-Persistent Enrichment",
                           Cluster == "ESC5" ~ "Persistent Depletion")) 

all.cluster.total <- all.clust.summary %>% 
  group_by(Cluster, nOTU, Cluster2, Compartment, Trend) %>% 
  count() %>% 
  select(-n) %>% 
  ungroup() %>% 
  mutate(Compartment = fct_recode(Compartment, 
                                  "Rhizosphere" = "RS",
                                  "Endosphere" = "ES")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere"))

all.agg.ra <- rbind(mutate(rs.agg.ra, Compartment = "RS"),
                    mutate(es.agg.ra, Compartment = "ES")) %>% 
  mutate(Cluster = paste(Compartment, Cluster, sep = ""))

saveRDS(all.clust.summary, "../Data/drought_clusters.RDS")

###############################################################################

all.clust.summary <- rbind(mutate(rs.ord, Compartment = "RS"), 
                           mutate(es.ord, Compartment = "ES")) %>% 
  mutate(Cluster = paste(Compartment, Cluster, sep = "")) %>%
  filter(Cluster %in% c("RSC1", "RSC2", "RSC4", "ESC3", "ESC4")) %>% 
  mutate(Trend = case_when(Cluster == "RSC1" ~ "Transient Enrichment",
                           Cluster == "RSC2" ~ "Transient Depletion",
                           Cluster == "RSC4" ~ "Persistent Depletion",
                           Cluster == "ESC3" ~ "Semi-Persistent Enrichment",
                           Cluster == "ESC4" ~ "Persistent Depletion")) 

all.cluster.total <- all.clust.summary %>% 
  group_by(Cluster, nOTU, Cluster2, Compartment, Trend) %>% 
  count() %>% 
  select(-n) %>% 
  ungroup() %>% 
  mutate(Compartment = fct_recode(Compartment, 
                                  "Rhizosphere" = "RS",
                                  "Endosphere" = "ES")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere"))

all.agg.ra <- rbind(mutate(rs.agg.ra, Compartment = "RS"),
                    mutate(es.agg.ra, Compartment = "ES")) %>% 
  mutate(Cluster = paste(Compartment, Cluster, sep = ""))

saveRDS(all.clust.summary, "../Data/drought_clusters.RDS")
```

```{r}
all.cluster.means <- all.agg.ra %>%
  mutate(Compartment = fct_recode(Compartment, 
                                  "Rhizosphere" = "RS",
                                  "Endosphere" = "ES")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>% 
  inner_join(select(all.cluster.total, -Compartment), by = "Cluster") %>% 
  group_by(Compartment, Time, Age, Treatment2, Trend) %>% 
  summarise(MeanRelAb = mean(AggRelAb)) %>% 
  ungroup() %>% 
  mutate(Treatment2 = str_replace(Treatment2, "D", "DS")) %>% 
  mutate(Treatment2 = fct_relevel(Treatment2, "WC")) 

all.cluster.ab <- all.agg.ra %>% 
  mutate(Compartment = fct_recode(Compartment, 
                                  "Rhizosphere" = "RS",
                                  "Endosphere" = "ES")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>% 
  inner_join(select(all.cluster.total, -Compartment), by = "Cluster") %>% 
  mutate(Treatment2 = str_replace(Treatment2, "D", "DS")) %>% 
  mutate(Treatment2 = fct_relevel(Treatment2, "WC"))

all.cluster.tax <- all.clust.summary %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere")) %>% 
  group_by(Compartment, Cluster, OTU_ID) %>% 
  count() %>% 
  ungroup() %>% 
  inner_join(tax, by = "OTU_ID") %>% 
  inner_join(select(all.clust.summary, OTU_ID, Trend, Compartment), by = c("Compartment", "OTU_ID")) %>% 
  mutate(Compartment = fct_recode(Compartment, 
                                  "Rhizosphere" = "RS",
                                  "Endosphere" = "ES")) %>% 
  mutate(Compartment = fct_relevel(Compartment, "Rhizosphere"))
```

```{r}
all.ab <- all.cluster.ab %>% 
  ggplot(aes(Age, AggRelAb, color = Treatment2)) +
  geom_point(alpha = 1, shape = 1, size = 2) +
  geom_vline(xintercept = ws.line, linetype = 3) +
  geom_line(data = all.cluster.means, aes(y = MeanRelAb), size = 1) + 
  ylab("Cumulative Relative Abundance") +
  xlab("Plant Age (days)") +
  facet_wrap(~ Compartment + Trend, scales = "free") +
  scale_color_manual(values = trt.pal, name = "Treatment") +
  guides(color = guide_legend(title.position = "top", title.hjust = 0.5)) +
  theme_classic() +
  theme(legend.position = c(0.8,0.2),
        text = element_text(size = 15))

all.ab

all.tax.p <- all.cluster.tax %>% 
  group_by(Compartment, Trend, PhyClass2) %>% 
  summarise(Count = sum(n)) %>% 
  group_by(Compartment, Trend) %>% 
  mutate(Fraction = Count / sum(Count)) %>% 
  mutate(ymax = cumsum(Fraction),
         nPhy = n()) %>% 
  mutate(ymin = c(0, ymax[1:nPhy - 1])) %>% 
  ggplot() +
  geom_rect(aes(ymax=ymax, ymin=ymin, xmax= 4, xmin= 3, fill= PhyClass2)) +
  geom_text(data = all.cluster.total, aes(2, 0, label = nOTU), size = 7) +
  scale_fill_manual(name = "Taxon",
                    values = phy.pal[c(11:14,2:10,1)],
                    limits = levels(tax$PhyClass2)[c(11:14,2:10,1)]) +
  guides(fill = guide_legend(ncol = 4)) +
  coord_polar(theta="y") + 
  xlim(c(2, 4)) +
  facet_wrap(. ~ Compartment + Trend, nrow = 1) +
  theme_void() +
  theme(text = element_text(size = 15),
        legend.position = "bottom",
        legend.title = element_blank(),
        strip.background = element_blank(),
        strip.text = element_blank())

all.tax.p

all.clust.p <- plot_grid(all.ab, all.tax.p, 
                         ncol = 1,
                         rel_heights = c(6,4),
                         align = "v",
                         axis = "lr",
                         labels = "B",
                         label_size = 20)

plot_grid(dao.p, all.ab, all.tax.p, 
          ncol = 1, 
          rel_heights = c(1,2,1),
          labels = c("A", "B", NA),
          label_size = 20) ###766:936
```

```{r}
all.cluster.tax %>% 
  group_by(Compartment, Trend, Phylum, PhyClass2, Class, Order) %>% 
  count() %>% 
  filter(n > 1) %>% 
  mutate(Taxonomy = paste(Phylum, Class, Order, sep = " / ")) %>% 
  ggplot(aes(Trend, Taxonomy, fill = PhyClass2)) +
  geom_tile(color = "white", size = 1) +
  geom_text(aes(label = n)) +
  scale_fill_manual(name = "Taxon",
                    values = phy.pal,
                    limits = levels(tax$PhyClass2)) +
  facet_grid(. ~ Compartment, scales = "free", space = "free") +
  theme_minimal() +
  theme(text = element_text(size = 15),
        axis.text.x = element_text(angle = 45, hjust = 1))
```


```{r}
supp.clust.summary <- rbind(mutate(rs.ord, Compartment = "RS"), 
                           mutate(es.ord, Compartment = "ES")) %>% 
  mutate(Cluster = paste(Compartment, Cluster, sep = "")) %>%
  #filter(Cluster %in% c("RSC1", "RSC2", "RSC4", "ESC3", "ESC4")) %>% 
  mutate(Trend = case_when(Cluster == "RSC1" ~ "Transient Enrichment",
                           Cluster == "RSC2" ~ "Transient Depletion",
                           Cluster == "RSC4" ~ "Persistent Depletion",
                           Cluster == "ESC3" ~ "Semi-Persistent Enrichment",
                           Cluster == "ESC4" ~ "Persistent Depletion")) %>% 
  select(Compartment, Cluster, Trend, OTU_ID) %>% 
  inner_join(tax, by = "OTU_ID") %>% 
  select(-PhyClass, -PhyClass2) 

write.table(supp.clust.summary, "../Tables/dao_clstr.tsv", sep = "\t", quote = F, row.names = F)
```

